
clean_dayflow_gauge_table <- function(dayflow_gauge_table){
  
  vroom(dayflow_gauge_table,
        #show_col_types = FALSE,
        col_select = c(Source, ID = Gauge_ID,
                       year_ini, year_end,
                       lat = Lat, lon = Lon, COMID),
        col_types = c(COMID = "i"), na = "-9999") |>
    filter(
      !is.na(COMID), # < removes excluded gauges
      COMID > 0 # < removes Canadian cases
      )
  
}

read_EHA <- function(EHA_shp){

  st_read(EHA_shp, quiet = TRUE) |>
    as_tibble() |> select(-geometry)
}

# ---- Patch for updating power plant-USGS gage in the HILARRI dataset ----Start
read_HILARRI <- function(HILARRI_gpkg, HILARRI_amendments){

  st_read(HILARRI_gpkg) |> as_tibble() |> select(-geom) ->
    HILARRI_raw

   read_csv(HILARRI_amendments,
            col_types = "c") ->
    HILARRI_USGS_replacements

  HILARRI_raw |>
    left_join(HILARRI_USGS_replacements, by = "eia_ptid") |>
    mutate(usgs_gage = if_else(!is.na(usgs_gage_new), usgs_gage_new, usgs_gage)) |>
    select(-usgs_gage_new) -> HILARRI_USGS_updated

  return(HILARRI_USGS_updated)

}

prepare_target_dams <- function(EHA, HILARRI, gen_aCF_1980_2022){
  
  # use EHA to define set of target complxids
  HILARRI |>
    select(eha_cmplx, EIA_ID = eia_ptid) |> unique() |>
    filter(!is.na(eha_cmplx), !is.na(EIA_ID),
           !(state %in% c("AK", "HI"))) |>
    left_join(gen_aCF_1980_2022 |>
                filter(year == latest_EIA_year) |>
                select(EIA_ID, nameplate),
              by = join_by(EIA_ID)) |>
    filter(!is.na(nameplate)) |>
    arrange(-nameplate) |>
    summarise(EIA_ID = first(EIA_ID),
              .by = eha_cmplx) -> dominant_plant


  dominant_plant |>
    left_join(HILARRI, by = c("eha_cmplx", "EIA_ID" = "eia_ptid")) |>
  # HILARRI |>
  #   filter(eia_ptid %in% dominant_plant$EIA_ID) |>
    select(eha_cmplx, lon = longitude, lat = latitude,
          nhdv2comid, huc_12) |> unique() ->
    HILARRI_

  EHA |>
    mutate(eha_cmplx = str_replace(EHA_PtID, "\\_..*", "")) |>
    filter(Type != "PS") |>
    select(eha_cmplx, plant_name = PtName, CH_MW) ->
    EHA_

  EHA_ |> filter(duplicated(eha_cmplx)) |> pull(eha_cmplx) ->
    duplicates

  EHA_ |> filter(eha_cmplx %in% duplicates) |>
    arrange(eha_cmplx) |>
    split(~eha_cmplx) |>
    map_dfr(function(dam){
      tibble(
        plant_name = paste(dam$plant_name, collapse = " / "),
        eha_cmplx = unique(dam$eha_cmplx)
      )
    }) |>
    left_join(select(EHA_, -plant_name),
              by = join_by(eha_cmplx)) |>
    summarise(CH_MW = sum(CH_MW),
              .by = c(plant_name, eha_cmplx)) ->
    duplicate_correction

  EHA_ |> filter(!eha_cmplx %in% duplicates) |>
    bind_rows(duplicate_correction) |>
    arrange(-CH_MW) |>
    filter(CH_MW > capacity_cutoff_MW) |>
    left_join(HILARRI_, by = join_by(eha_cmplx)) |>
    unique() ->
    target_plants

  target_plants |>
    group_by(eha_cmplx) |>
    summarise(plant_name = first(plant_name),
              CH_MW = first(CH_MW),
              lon = first(lon), lat = first(lat),
              nhdv2comid = first(nhdv2comid),
              huc_12 = first(huc_12)) ->
    target_plants_HILARRI_duplicates_removed

  return(target_plants_HILARRI_duplicates_removed)


}

combine_dayflow_data <- function(Dayflow_hydro_dams_daily_POI,
                                 Dayflow_hydro_dams_daily_supplement){

  POI <- read_parquet(Dayflow_hydro_dams_daily_POI)
  Sup <- read_parquet(Dayflow_hydro_dams_daily_supplement) 
  
  names(POI)[-1] -> existing_comids
  Sup |> 
    select(-any_of(existing_comids)) |> 
    left_join(POI, by = "date") ->
    Dayflow_hydro_dams_daily
  
  return(Dayflow_hydro_dams_daily)
    
  }